In [1]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from datetime import timedelta
from matplotlib.ticker import MaxNLocator
%matplotlib inline

In [2]:
# plotting parameters
figure_size = (6.25984252, 4.1456693) # = 15.9, 10.53 cm

def rgb(rgb_255_tuple):
    return tuple(v/255 for v in rgb_255_tuple)

colour_op = rgb((13, 103, 53))
colour_s = rgb((254, 205, 8))
colour_p = rgb((237, 29, 36))

sns.set_palette('muted')

Raw data processing

Inflow profile calculation


In [3]:
df = pd.read_csv('CS2 data 1min_2017-09-26.pdi.gz')

# also make forward filled
time_interval = pd.Timedelta(minutes=1)
df = df.pivot_table(values='Value', columns='TagName', index=(pd.to_datetime(df['Date'].str.cat(df['Time'], sep=' ')) - time_interval))
df.index.name = 'DateTime'

df['Time_30'] = df.index.floor('30min').time

df['EskomDayOfWeek'] = df.index.dayofweek + 1
# replace holidays with Eskom days
h_path = '..\holidays.csv'
holidays = np.genfromtxt(h_path,  dtype=np.dtype('M'), delimiter=',', usecols=0)
holidays = list(holidays)
holidays_numbers = np.genfromtxt(h_path, dtype=int, delimiter=',', usecols=1)
holidays_numbers = list(holidays_numbers)
for h_n, h_d in zip(holidays_numbers, holidays):
    df.loc[df.index.date==h_d,'EskomDayOfWeek'] = h_n
# drop weekends
df = df[df['EskomDayOfWeek'] <= 5]
df.drop('EskomDayOfWeek', axis=1)

df.head(5)


Out[3]:
TagName KDCE_S10_12PMP00_00ILT#502.FA_PV KDCE_S10_12PMP00_00ILT#503.FA_PV KDCE_S10_12PMP00_01ILT#503.FA_PV KDCE_S10_12PMP02_Machine.FA_RF KDCE_S10_12PMP03_Machine.FA_RF KDCE_S10_12PMP04_Machine.FA_RF KDCE_S10_12PMP06_Machine.FA_RF KDCE_S10_12PMP07_Machine.FA_RF KDCE_S10_27PMP00_00ILT#504.FA_PV KDCE_S10_27PMP00_00ILT#505.FA_PV KDCE_S10_27PMP00_01ILT#505.FA_PV KDCE_S10_27PMP02_Machine.FA_RF KDCE_S10_27PMP03_Machine.FA_RF KDCE_S10_27PMP04_Machine.FA_RF KDCE_S10_27PMP05_Machine.FA_RF Time_30 EskomDayOfWeek
DateTime
2016-09-01 00:00:00 52.967242 40.914352 42.153806 1.0 0.0 1.0 0.0 1.0 43.670489 49.681713 64.149307 0.0 1.0 1.0 NaN 00:00:00 4
2016-09-01 00:01:00 53.067131 40.914352 42.174387 1.0 0.0 1.0 0.0 1.0 43.621620 49.681713 64.149307 0.0 1.0 1.0 NaN 00:00:00 4
2016-09-01 00:02:00 53.067131 40.914352 42.221463 1.0 0.0 1.0 0.0 1.0 43.732320 49.681713 64.149307 0.0 1.0 1.0 NaN 00:00:00 4
2016-09-01 00:03:00 53.210744 40.914352 42.239582 1.0 0.0 1.0 0.0 1.0 43.664848 49.681713 64.149307 0.0 1.0 1.0 NaN 00:00:00 4
2016-09-01 00:04:00 53.211807 40.914352 42.265280 1.0 0.0 1.0 0.0 1.0 43.689365 49.681713 64.149307 0.0 1.0 1.0 NaN 00:00:00 4

In [4]:
#df.drop(['KDCE_S10_12PMP00_01ILT#502.FA_PV', 'KDCE_S10_27PMP00_01ILT#504.FA_PV'], axis=1, inplace=True) #broken dam sensor

df = df.rename(columns = {'KDCE_S10_12PMP00_00ILT#502.FA_PV': '12L D1'})
df['12L D2'] = df[['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV']].mean(axis=1)

df = df.rename(columns = {'KDCE_S10_27PMP00_00ILT#504.FA_PV': '27L D1'})
df['27L D2'] = df[['KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV']].mean(axis=1)

df.drop(['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV',
         'KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV'], axis=1, inplace=True)

df['12L Level'] = df[['12L D1', '12L D2']].mean(axis=1)
df['27L Level'] = df[['27L D1', '27L D2']].mean(axis=1)

df.drop(['12L D1', '12L D2', '27L D1', '27L D2'], axis=1, inplace=True)

df.head(5)


Out[4]:
TagName KDCE_S10_12PMP02_Machine.FA_RF KDCE_S10_12PMP03_Machine.FA_RF KDCE_S10_12PMP04_Machine.FA_RF KDCE_S10_12PMP06_Machine.FA_RF KDCE_S10_12PMP07_Machine.FA_RF KDCE_S10_27PMP02_Machine.FA_RF KDCE_S10_27PMP03_Machine.FA_RF KDCE_S10_27PMP04_Machine.FA_RF KDCE_S10_27PMP05_Machine.FA_RF Time_30 EskomDayOfWeek 12L Level 27L Level
DateTime
2016-09-01 00:00:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.250660 50.293000
2016-09-01 00:01:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.305750 50.268565
2016-09-01 00:02:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.317519 50.323915
2016-09-01 00:03:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.393856 50.290179
2016-09-01 00:04:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.400812 50.302437

In [5]:
status_col_dict = {}
for c in df.columns:
    if '_Machine.FA_RF' in c: # pump status
        item = c.split('_')[2].split('PMP')
        level = item[0] + 'L'
        pump_num = 'P' + item[1][1]
        status_col_dict[c] = level + ' ' + pump_num
        
df = df.rename(columns = status_col_dict)

In [6]:
df.head(5)


Out[6]:
TagName 12L P2 12L P3 12L P4 12L P6 12L P7 27L P2 27L P3 27L P4 27L P5 Time_30 EskomDayOfWeek 12L Level 27L Level
DateTime
2016-09-01 00:00:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.250660 50.293000
2016-09-01 00:01:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.305750 50.268565
2016-09-01 00:02:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.317519 50.323915
2016-09-01 00:03:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.393856 50.290179
2016-09-01 00:04:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.400812 50.302437

In [7]:
df = df.rename(columns = {'12L P2': '12L P1',
                          '12L P3': '12L P2',
                          '12L P4': '12L P3',
                          '12L P6': '12L P4',
                          '12L P7': '12L P5',
                          '27L P2': '27L P1',
                          '27L P3': '27L P2',
                          '27L P4': '27L P3',
                          '27L P5': '27L P4'})
df.head(5)


Out[7]:
TagName 12L P1 12L P2 12L P3 12L P4 12L P5 27L P1 27L P2 27L P3 27L P4 Time_30 EskomDayOfWeek 12L Level 27L Level
DateTime
2016-09-01 00:00:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.250660 50.293000
2016-09-01 00:01:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.305750 50.268565
2016-09-01 00:02:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.317519 50.323915
2016-09-01 00:03:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.393856 50.290179
2016-09-01 00:04:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.400812 50.302437

Process selected data


In [8]:
def calculate_inflows(level_name, pump_flowrates, capacity, feeding_level_name=None):
    # feeding_level_name is the level feeding this level
    
    number_of_pumps = len(pump_flowrates)

    calc_pump_flows = [np.nan]
    calc_inflows = [np.nan]

    for i in range(1, len(df.index)):
        pump_status = {}
        for j,_ in enumerate(pump_flowrates):
            pump_str = 'P' + str(j+1)
            pump_status[pump_str] = df[level_name + ' ' + pump_str].values[i]
        
        l_new = df[level_name + ' Level'].values[i]
        l_old = df[level_name + ' Level'].values[i-1]
        pump_flow_from_prev_lev = 0 if feeding_level_name is None else df[feeding_level_name + ' PumpFlow'].values[i]
        
        if np.isnan([v for k,v in pump_status.items()] + [l_new] + [l_old] + [pump_flow_from_prev_lev]).any():
            ans_pumpflow = np.nan
            ans_inflow = np.nan

        delta_level = l_new - l_old
        if delta_level != 0:
            ans_outflow_pumps = 0
            for ps, pf in zip(pump_status.items(), pump_flowrates):
                ans_outflow_pumps += ps[1] * pf
                
            ans_inflow = ((l_new - l_old) / 100 * capacity) / 60 + ans_outflow_pumps - pump_flow_from_prev_lev
        else:
            ans_outflow_pumps = np.nan
            ans_inflow = np.nan

        calc_pump_flows.append(ans_outflow_pumps)
        calc_inflows.append(ans_inflow)

    df[level_name + ' PumpFlow'] = calc_pump_flows
    df[level_name + ' Inflow'] = calc_inflows

In [9]:
calculate_inflows('27L', [236.1]*4, 3000000)
calculate_inflows('12L', [194.6]*5, 3000000, '27L')

In [10]:
df.head(5)


Out[10]:
TagName 12L P1 12L P2 12L P3 12L P4 12L P5 27L P1 27L P2 27L P3 27L P4 Time_30 EskomDayOfWeek 12L Level 27L Level 27L PumpFlow 27L Inflow 12L PumpFlow 12L Inflow
DateTime
2016-09-01 00:00:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.250660 50.293000 NaN NaN NaN NaN
2016-09-01 00:01:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.305750 50.268565 NaN NaN 583.8 NaN
2016-09-01 00:02:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.317519 50.323915 NaN NaN 583.8 NaN
2016-09-01 00:03:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.393856 50.290179 NaN NaN 583.8 NaN
2016-09-01 00:04:00 1.0 0.0 1.0 0.0 1.0 0.0 1.0 1.0 NaN 00:00:00 4 47.400812 50.302437 NaN NaN 583.8 NaN

Export data for use in simulation

Inflow profile


In [11]:
df2 = df.copy()

date_start = '2016-10-25'
date_end = '2016-10-26'

df2 = df2[(df2.index>=date_start) & (df2.index<date_end)]

inflow_profiles = []
overall_day_completeness_count = 0
overall_day_completeness_max = 0
overall_completeness_count = 0
overall_completeness_max = 0
for l in ['27L', '12L']:
    grouped = df2.set_index('Time_30')[l + ' Inflow'].groupby('Time_30')
    
    grouped_mean = grouped.mean()
    inflow_profiles.append(grouped_mean)
    
    print('{} completeness: {:6.2f}% → {:6.2f}%'.format(l, grouped.count().sum()/48/30*100, grouped_mean.count()/48*100))#grouped.count()/30
    overall_day_completeness_count += grouped.count().sum()
    overall_day_completeness_max += 48*30
    overall_completeness_count += grouped_mean.count()
    overall_completeness_max += 48
print('                   ------   -------')
print('All completenes:  {:6.2f}% → {:6.2f}%'.format(overall_day_completeness_count/overall_day_completeness_max*100, overall_completeness_count/overall_completeness_max*100))
    
pd.DataFrame(inflow_profiles).T.to_csv('..\..\simulations\Case_study_2\input\CS2_dam_inflow_profiles.csv.gz', compression='gzip')


27L completeness: 100.00% → 100.00%
12L completeness: 100.00% → 100.00%
                   ------   -------
All completenes:  100.00% → 100.00%

Data for validation of the simulation


In [12]:
df_real = pd.read_csv('CS2 data 1s 10-25_2017-09-26_pivot.csv.gz')
df_real = df_real.set_index('DateTime')
df_real.index = pd.to_datetime(df_real.index)
df_real.head(5)


Out[12]:
KDCE_S10_12PMP00_00ILT#502.FA_PV KDCE_S10_12PMP00_00ILT#503.FA_PV KDCE_S10_12PMP00_01ILT#503.FA_PV KDCE_S10_12PMP02_Machine.FA_RF KDCE_S10_12PMP03_Machine.FA_RF KDCE_S10_12PMP04_Machine.FA_RF KDCE_S10_12PMP06_Machine.FA_RF KDCE_S10_12PMP07_Machine.FA_RF KDCE_S10_27PMP00_00ILT#504.FA_PV KDCE_S10_27PMP00_00ILT#505.FA_PV KDCE_S10_27PMP00_01ILT#505.FA_PV KDCE_S10_27PMP02_Machine.FA_RF KDCE_S10_27PMP03_Machine.FA_RF KDCE_S10_27PMP04_Machine.FA_RF KDCE_S10_27PMP05_Machine.FA_RF
DateTime
2016-10-25 00:00:00 47.222221 47.569447 31.944445 1.0 0.0 1.0 0.0 1.0 55.989582 64.641205 74.479172 0.0 1.0 0.0 1.0
2016-10-25 00:00:01 47.222221 47.569447 31.944445 1.0 0.0 1.0 0.0 1.0 55.989582 64.641205 74.479172 0.0 1.0 0.0 1.0
2016-10-25 00:00:02 47.222221 47.569447 31.944445 1.0 0.0 1.0 0.0 1.0 55.989582 64.641205 74.479172 0.0 1.0 0.0 1.0
2016-10-25 00:00:03 47.222221 47.569447 31.944445 1.0 0.0 1.0 0.0 1.0 55.989582 64.641205 74.479172 0.0 1.0 0.0 1.0
2016-10-25 00:00:04 47.222221 47.569447 31.944445 1.0 0.0 1.0 0.0 1.0 55.989582 64.641205 74.479172 0.0 1.0 0.0 1.0

In [13]:
df_real['12L Status'] = df_real[[col for col in df_real.columns if ('12PMP' in col and 'Machine.FA_RF' in col)]].sum(axis=1)
df_real['27L Status'] = df_real[[col for col in df_real.columns if ('27PMP' in col and 'Machine.FA_RF' in col)]].sum(axis=1)
df_real.drop([col for col in df_real.columns if 'Machine.FA_RF' in col], axis=1, inplace=True)

#df_real.drop(['KDCE_S10_12PMP00_01ILT#502.FA_PV', 'KDCE_S10_27PMP00_01ILT#504.FA_PV'], axis=1, inplace=True) #broken dam sensor
df_real = df_real.rename(columns = {'KDCE_S10_12PMP00_00ILT#502.FA_PV': '12L D1'})
df_real['12L D2'] = df_real[['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV']].mean(axis=1)

df_real = df_real.rename(columns = {'KDCE_S10_27PMP00_00ILT#504.FA_PV': '27L D1'})
df_real['27L D2'] = df_real[['KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV']].mean(axis=1)

df_real.drop(['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV',
         'KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV'], axis=1, inplace=True)


df_real['12L Total water'] = 3*1000000/100* df_real[['12L D1', '12L D2']].sum(axis=1)
df_real['27L Total water'] = 3*1000000/100* df_real[['27L D1', '27L D2']].sum(axis=1)
df_real['12L Level'] = df_real['12L Total water']/(2*3*1000000) * 100
df_real['27L Level'] = df_real['27L Total water']/(2*3*1000000) * 100

df_real.dropna(axis=1, how='any', inplace=True)
df_real.head(5)


Out[13]:
12L D1 27L D1 12L Status 27L Status 12L D2 27L D2 12L Total water 27L Total water 12L Level 27L Level
DateTime
2016-10-25 00:00:00 47.222221 55.989582 3.0 2.0 39.756946 69.560188 2.609375e+06 3.766493e+06 43.489583 62.774885
2016-10-25 00:00:01 47.222221 55.989582 3.0 2.0 39.756946 69.560188 2.609375e+06 3.766493e+06 43.489583 62.774885
2016-10-25 00:00:02 47.222221 55.989582 3.0 2.0 39.756946 69.560188 2.609375e+06 3.766493e+06 43.489583 62.774885
2016-10-25 00:00:03 47.222221 55.989582 3.0 2.0 39.756946 69.560188 2.609375e+06 3.766493e+06 43.489583 62.774885
2016-10-25 00:00:04 47.222221 55.989582 3.0 2.0 39.756946 69.560188 2.609375e+06 3.766493e+06 43.489583 62.774885

In [14]:
date_range_start = '2016-10-25'
date_range_end = '2016-10-26'
df_real2 = df_real.ix[date_range_start:date_range_end]
df_real2[['12L Level','27L Level']].plot()
df_real2[['12L Status','27L Status']].plot()
plt.show()
plt.close('all')


C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until

Save simulation testing data


In [15]:
df_real2[['12L Status', '27L Status', '12L Level', '27L Level']].to_csv('..\..\simulations\Case_study_2\input\CS2_data_for_validation.csv.gz', compression='gzip')

Now perform the simulation in validation mode

If the simulation outputs are reasonably "the same" as the real data, the simulation is "correct"

Run the simulation in validation_mode using initial values for dam level and following pump status instead of scheduler


In [16]:
df_sim = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_validation.csv.gz')
df_sim['time_clean'] = pd.to_datetime(df_sim['seconds'], unit='s') + timedelta(days=17099)
df_sim.head(5)


Out[16]:
seconds 27L Level 27L Status 12L Level 12L Status Eskom ToU Pump system total power time_clean
0 0 62.774885 2.0 43.489583 3.0 3 13821.0 2016-10-25 00:00:00
1 1 62.775565 2.0 43.487235 3.0 3 13821.0 2016-10-25 00:00:01
2 2 62.776246 2.0 43.484887 3.0 3 13821.0 2016-10-25 00:00:02
3 3 62.776926 2.0 43.482539 3.0 3 13821.0 2016-10-25 00:00:03
4 4 62.777606 2.0 43.480191 3.0 3 13821.0 2016-10-25 00:00:04

In [17]:
def rmse(real, sim):
    return np.sqrt(((real - sim) ** 2).mean())

def r2(x, y):
    return stats.pearsonr(x,y)[0] ** 2

Comparison of simulation and actual data

27L pump status


In [18]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['27L Status'].values
y2 = df_sim['27L Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  1.0
RMSE =  0.0

In [19]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['27L Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['27L Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 4])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_27l_status.png', bbox_inches='tight')
plt.close('all')


27L dam level


In [20]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['27L Level'].values
y2 = df_sim['27L Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.995293454095
RMSE =  0.689423123644

In [21]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['27L Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['27L Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_27l_level.png', bbox_inches='tight')
plt.close('all')


12L pump status


In [22]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['12L Status'].values
y2 = df_sim['12L Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  1.0
RMSE =  0.0

In [23]:
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['12L Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['12L Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 4])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_12l_status.png', bbox_inches='tight')
plt.close('all')


12L dam level


In [24]:
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['12L Level'].values
y2 = df_sim['12L Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))


R squared =  0.996963541361
RMSE =  0.71226412034

In [25]:
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['12L Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['12L Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_12l_level.png', bbox_inches='tight')
plt.close('all')


Processing of x-factored simulation data


In [26]:
def tou_shade(ax, date):
    y_m_d = '{}-{:02}-{:02}'.format(date.year, date.month, date.day)
    ax.axvspan(y_m_d + ' 00:00:00', y_m_d + ' 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
    ax.axvspan(y_m_d + ' 06:00:00', y_m_d + ' 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
    ax.axvspan(y_m_d + ' 07:00:00', y_m_d + ' 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
    ax.axvspan(y_m_d + ' 10:00:00', y_m_d + ' 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 18:00:00', y_m_d + ' 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 20:00:00', y_m_d + ' 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 22:00:00', y_m_d + ' 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

def sim_plot(case_study, factor, mode, level_name, level_limits, x, y, y_lim=100, save_fig=False, chart_type_override=None):
    fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

    if mode != 'power' and mode != 'power_raw':
        for l in level_limits:
            ax1.plot([x[0], x[len(x)-1]], [l, l], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')

        lines1 = ax1.plot(x, df[level_name + ' Level'])

        ax2 = ax1.twinx()
        lines2 = ax2.plot(x, df[level_name + ' Status'], color=sns.color_palette('muted')[2])
        
        tou_shade(ax2, x[0])

        ax1.set_ylabel('Dam level (%)')
        ax1.set_ylim([0, y_lim])
        ax2.set_ylabel('Number of pumps running')
        ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
        
        ax1.set_zorder(ax2.get_zorder()+1)
        ax1.patch.set_visible(False)

        handles, labels = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        del handles[1]
        del labels[1]
        handles += handles2
        labels += labels2
        order = [3, 1, 4, 0, 5, 2]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

        ax1.yaxis.label.set_color(sns.color_palette('muted')[0])
        ax2.yaxis.label.set_color(sns.color_palette('muted')[2])
    
    else:
        lbl = 'Power (resampled, step plot)' if mode == 'power' else 'Power (raw data, 1 second)'
        ax1.plot(x, y, drawstyle='steps-post', label=lbl, zorder=3)
        tou_shade(ax1, x[0])
        ax1.set_ylabel('Power (kW)')
        if y_lim is not None:
            ax1.set_ylim([0, y_lim])
        
        handles, labels = ax1.get_legend_handles_labels()
        order = [1, 0, 2, 3]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
    
    ax1.set_xlabel('Time of day')
    if mode != 'power' and mode != 'power_raw':
        ax1.yaxis.set_ticks(np.arange(0, 101, 10))
    ax1.xaxis.set_major_locator(mdates.HourLocator())
    ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    fig.autofmt_xdate(rotation=90)
    ax1.set_xlim((min(x), max(x)))

    ax1.grid(which='major', alpha=0.5)
    
    fig.tight_layout()
    
    if save_fig:
        if mode == 'power_raw':
            chart_type = 'power_raw'
        elif mode == 'power':
            chart_type = 'power_resampled'
        else:
            chart_type = 'dam_level_and_status'
                
        filename = 'output/CS{}_{}_{}_{}.png'.format(case_study, level_name, factor, chart_type)
        fig.savefig(filename, bbox_inches='tight')
    
    return fig

1-factor


In [27]:
df = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_1-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head(5)


Out[27]:
seconds 27L Level 27L Status 12L Level 12L Status Eskom ToU Pump system total power time_clean
0 0 62.774885 2.0 43.489583 3.0 3 13821.0 1970-01-01 00:00:00
1 1 62.775565 2.0 43.487235 3.0 3 13821.0 1970-01-01 00:00:01
2 2 62.776246 2.0 43.484887 3.0 3 13821.0 1970-01-01 00:00:02
3 3 62.776926 2.0 43.482539 3.0 3 13821.0 1970-01-01 00:00:03
4 4 62.777606 2.0 43.480191 3.0 3 13821.0 1970-01-01 00:00:04

In [28]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


27L Min: 24.985448049954787%  Max: 73.56603136848814%
12L Min: 24.99436643003447%  Max: 80.00348510880113%

Plot dam level and status


In [29]:
fig = sim_plot(2, 1, 'level', '27L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [30]:
fig = sim_plot(2, 1, 'level', '12L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)


Plot power

Raw data


In [31]:
fig = sim_plot(2, 1, 'power_raw', None, None, df['time_clean'], df['Pump system total power'], y_lim=None, save_fig=True)


Resampled data


In [32]:
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS2_resampled_power_1_factor.csv')

fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)
#fig.patch.set_facecolor('grey')

x = resampled.index
y = resampled['total_30_minute']

ax1.plot(x,y, 'x', label="Power (data points resampled to 30 minutes)", zorder=4)
ax1.plot(x,y, '--', label="Power (resampled, line plot)", zorder=2)
ax1.plot(x,y, drawstyle='steps-post', label="Power (resampled, step plot)", zorder=3)
ax1.set_xlabel('Time of day')
ax1.set_ylabel('Power consumption (kW)')
#ax1.set_ylim([0, 4000])

ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)

plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_1_power_resampled_all.png', bbox_inches='tight')
plt.close('all')



In [33]:
fig = sim_plot(2, 1, 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)


2-factor


In [34]:
df = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_2-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head(5)


Out[34]:
seconds 27L Level 27L Status 12L Level 12L Status Eskom ToU Pump system total power time_clean
0 0 62.774885 2.0 43.489583 3.0 3 13821.0 1970-01-01 00:00:00
1 1 62.775565 2.0 43.487235 3.0 3 13821.0 1970-01-01 00:00:01
2 2 62.776246 2.0 43.484887 3.0 3 13821.0 1970-01-01 00:00:02
3 3 62.776926 2.0 43.482539 3.0 3 13821.0 1970-01-01 00:00:03
4 4 62.777606 2.0 43.480191 3.0 3 13821.0 1970-01-01 00:00:04

In [35]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


27L Min: 24.985448049954787%  Max: 73.56603136848814%
12L Min: 24.99436643003447%  Max: 80.00348510880113%

Plot dam level and status


In [36]:
fig = sim_plot(2, 2, 'level', '27L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [37]:
fig = sim_plot(2, 2, 'level', '12L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)


Plot power


In [38]:
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS2_resampled_power_2_factor.csv')

fig = sim_plot(2, 2, 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)


n-factor


In [39]:
df = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_n-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head(5)


Out[39]:
seconds 27L Level 27L Status 12L Level 12L Status Eskom ToU Pump system total power time_clean
0 0 62.774885 2.0 43.489583 3.0 3 13821.0 1970-01-01 00:00:00
1 1 62.775565 2.0 43.487235 3.0 3 13821.0 1970-01-01 00:00:01
2 2 62.776246 2.0 43.484887 3.0 3 13821.0 1970-01-01 00:00:02
3 3 62.776926 2.0 43.482539 3.0 3 13821.0 1970-01-01 00:00:03
4 4 62.777606 2.0 43.480191 3.0 3 13821.0 1970-01-01 00:00:04

In [40]:
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))


27L Min: 29.994697111926893%  Max: 73.56603136848814%
12L Min: 31.13030737900091%  Max: 80.00198402436575%

Plot dam level and status


In [41]:
fig = sim_plot(2, 'n', 'level', '27L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [42]:
fig = sim_plot(2, 'n', 'level', '12L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)


Plot power


In [43]:
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS2_resampled_power_n_factor.csv')

fig = sim_plot(2, 'n', 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)


Scoring graph


In [44]:
scores = pd.read_csv('scoring_results.csv').set_index('Score')
scores


Out[44]:
1-factor 2-factor n-factor
Score
Feature 16.666667 20.370370 33.333333
Performance 36.712851 36.712851 63.348608

In [45]:
sns.set()

score_sim = (scores['1-factor']['Performance'], scores['2-factor']['Performance'], scores['n-factor']['Performance'])
score_feat = (scores['1-factor']['Feature'], scores['2-factor']['Feature'], scores['n-factor']['Feature'])
ind = np.arange(len(score_sim))    # the x locations for the groups
width = 0.5       # the width of the bars: can also be len(x) sequence

plt.figure(figsize=figure_size)

p1 = plt.bar(ind, score_sim, width)
p2 = plt.bar(ind, score_feat, width, bottom=score_sim)


plt.ylabel('Scores')
plt.xlabel('Control system')
    #'x-factored simulation')
plt.xticks(ind, ('1-factor', '2-factor', r'$n$-factor'))
#plt.yticks(np.arange(0, 101, 10))
plt.gca().yaxis.set_major_locator(plt.NullLocator())
#plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), loc='best')
plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)

for r1, r2 in zip(p1, p2):
    h1 = r1.get_height()
    h2 = r2.get_height()
    plt.text(r1.get_x() + r1.get_width() / 2., h1 / 2., "%.2f" % h1, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 / 2., "%.2f" % h2, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 + 1.5, "%.2f" % (h1 + h2),
         ha="center", va="center", color='black', weight='bold')


plt.grid(False)

plt.tight_layout()

#plt.show()

plt.savefig('output/CS2_final_scores.png', bbox_inches='tight', figsize=figure_size, dpi=1200)
plt.close('all')

In [ ]: